/* Produce the household roster dataset to do summary stats */

local folder "C:\Users\mlevere\OneDrive - Mathematica\Documents\Projects\Nepal\"

use "`folder'/Data/Midline_Raw/SEC03", clear
rename q03_idc id_code
rename q03_301 name
rename q03_302 relationship
rename q03_303 sex
rename q03_304a age_years
rename q03_304b age_months
rename q03_305 literacy
rename q03_306 school
rename q03_307 highest_grade
rename q03_308 marital_status
rename q03_309a occupation_main
rename q03_309b occupation_secondary
rename q03_310 residing
rename q03_311 eligible
by www hh, sort: egen num_hh_members_raw = count(id_code)
by www hh, sort: egen num_hh_members_residing_raw = count(id_code) if residing == 1
by www hh, sort: egen num_eligible_women_raw = count(id_code) if eligible == 1 & age_year > 5
by www hh, sort: egen num_eligible_kids_raw = count(id_code) if eligible == 1 & age_year <= 5
by www hh, sort: egen num_hh_members = max(num_hh_members_raw)
by www hh, sort: egen num_hh_members_residing = max(num_hh_members_residing_raw)
by www hh, sort: egen num_eligible_women = max(num_eligible_women_raw)
by www hh, sort: egen num_eligible_kids = max(num_eligible_kids_raw)
drop *_raw
tempfile midline_raw_fam
save "`midline_raw_fam'", replace

/* Add on the VDC name so that we can get the treatment status */
merge m:1 www hh sn using "`folder'/Data/midline_raw_hh", keepusing(vdc_name district vdc_code religion) nogen
save "`midline_raw_fam'", replace


use "`folder'/Data/cosampling.dta", clear
keep if sampco == 1
keep vdc control_vdc info_vdc cash_vdc
by vdc, sort: keep if _n == 1
rename vdc vdc_name
merge 1:m vdc_name using "`midline_raw_fam'", nogen
sort www hh
/* Note: Since this is a 25% sample, will have a bunch of VDC's merged in that
   don't have a treatment status */
drop if www == .


/* Generate one variable with all three treatment indicators, and label it,
   for the purpose of generating tables */
gen treatment = 1 if control_vdc == 1
replace treatment = 2 if info_vdc == 1
replace treatment = 3 if cash_vdc == 1

label define treatgroup 1 "Control" 2 "Info Only" 3 "Info + Cash"
label values treatment treatgroup

/* Generate the three comparison dummies so that we can do 2-way
   hypothesis testing */
gen cash_control = cash_vdc
replace cash_control = . if info_vdc == 1

gen info_control = info_vdc
replace info_control = . if cash_vdc == 1

gen cash_info = cash_vdc
replace cash_info = . if control_vdc == 1

save "`folder'/Data/midline_raw_fam", replace
